Environment Set-up
#is_gluonts_activated()
#get_python_env()
#check_gluonts_dependencies()
#detect_default_gluonts_env()
#remotes::install_github("business-science/modeltime.gluonts")
#modeltime.gluonts::install_gluonts()
# GluonTS Installation - Run 1st time
#modeltime.gluonts::install_gluonts(fresh_install = T, include_pytorch = F)
library(modeltime.gluonts)
## Loading required package: modeltime
## Warning: package 'modeltime' was built under R version 4.1.2
library(tidymodels)
## Warning: package 'tidymodels' was built under R version 4.1.2
## Registered S3 method overwritten by 'tune':
## method from
## required_pkgs.model_spec parsnip
## -- Attaching packages -------------------------------------- tidymodels 0.1.4 --
## v broom 0.7.11 v recipes 0.1.17
## v dials 0.0.10 v rsample 0.1.1
## v dplyr 1.0.7 v tibble 3.1.6
## v ggplot2 3.3.5 v tidyr 1.1.4
## v infer 1.0.0 v tune 0.1.6
## v modeldata 0.1.1 v workflows 0.2.4
## v parsnip 0.1.7 v workflowsets 0.1.0
## v purrr 0.3.4 v yardstick 0.0.9
## Warning: package 'dials' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'infer' was built under R version 4.1.2
## Warning: package 'modeldata' was built under R version 4.1.2
## Warning: package 'parsnip' was built under R version 4.1.2
## Warning: package 'recipes' was built under R version 4.1.2
## Warning: package 'rsample' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'tune' was built under R version 4.1.2
## Warning: package 'workflows' was built under R version 4.1.2
## Warning: package 'workflowsets' was built under R version 4.1.2
## Warning: package 'yardstick' was built under R version 4.1.2
## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x purrr::discard() masks scales::discard()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x recipes::step() masks stats::step()
## * Learn how to get started at https://www.tidymodels.org/start/
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v readr 2.1.1 v forcats 0.5.1
## v stringr 1.4.0
## Warning: package 'readr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x readr::col_factor() masks scales::col_factor()
## x purrr::discard() masks scales::discard()
## x dplyr::filter() masks stats::filter()
## x stringr::fixed() masks recipes::fixed()
## x dplyr::lag() masks stats::lag()
## x readr::spec() masks yardstick::spec()
library(timetk)
## Warning: package 'timetk' was built under R version 4.1.2
library(modeltime)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.2
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(mice)
## Warning: package 'mice' was built under R version 4.1.2
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
Project
product_ts_cmb = read.csv("product_ts_full.csv",na.strings = " ")
product_ts_cmb$ISOWEEKS = as.Date(product_ts_cmb$ISOWEEKS)
product_ts_cmb$CATEGORY = as.factor(product_ts_cmb$CATEGORY)
head(product_ts_cmb)
## WEEK ISOWEEKS YEARWEEK SUM PRICE PROMO CATEGORY
## 1 1 1989-01-01 1989-00 177538 0.575 1 FrontEnd Candies
## 2 1 1989-01-01 1989-00 170067 0.950 1 Frozen Juice
## 3 1 1989-01-01 1989-00 138771 1.880 1 Bottled Juice
## 4 1 1989-01-01 1989-00 478899 1.990 1 Soft Drink
## 5 2 1989-01-08 1989-01 174278 0.575 1 FrontEnd Candies
## 6 2 1989-01-08 1989-01 140251 1.040 1 Frozen Juice
data <- product_ts_cmb %>%
select(CATEGORY, ISOWEEKS,PRICE ,PROMO, SUM) %>%
group_by(CATEGORY) %>%
ungroup()
data %>%
group_by(CATEGORY) %>%
plot_time_series(
ISOWEEKS,
SUM,
.facet_ncol = 3,
.interactive = FALSE
)

data %>%
group_by(CATEGORY) %>%
plot_time_series(
ISOWEEKS,
PRICE,
.facet_ncol = 3,
.interactive = FALSE
)

HORIZON <- 52
new_data <- data %>%
group_by(CATEGORY) %>%
future_frame(.length_out = HORIZON) %>%
ungroup()
## .date_var is missing. Using: ISOWEEKS
Data processing - Training and test
#summary Stats
pander::pander(summary(data))
Table continues below
| Bath Soap :265 |
Min. :1989-01-01 |
Min. :0.000 |
Min. :0.0000 |
| Beer :303 |
1st Qu.:1991-05-26 |
1st Qu.:0.990 |
1st Qu.:1.0000 |
| Bottled Juice :392 |
Median :1993-02-07 |
Median :1.290 |
Median :1.0000 |
| FrontEnd Candies:396 |
Mean :1993-01-26 |
Mean :1.567 |
Mean :0.9911 |
| Frozen Juice :396 |
3rd Qu.:1994-12-02 |
3rd Qu.:1.970 |
3rd Qu.:1.0000 |
| Soft Drink :390 |
Max. :1996-08-18 |
Max. :6.315 |
Max. :1.0000 |
| Min. : 0 |
| 1st Qu.: 63770 |
| Median : 134317 |
| Mean : 228657 |
| 3rd Qu.: 200924 |
| Max. :1605204 |
Selecting Category for model training
#input category
filtereddata = filter(data,CATEGORY == 'FrontEnd Candies')
#filtereddata = data
PMM Imputation
#Replacing all zeroes to NA:
filtereddata$SUM[filtereddata$SUM == 0] <- NA
filtereddata$PRICE[filtereddata$PRICE == 0] <- NA
imp <- mice(filtereddata,m=1,maxit=50,meth='pmm',seed=500)
##
## iter imp variable
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## 7 1
## 8 1
## 9 1
## 10 1
## 11 1
## 12 1
## 13 1
## 14 1
## 15 1
## 16 1
## 17 1
## 18 1
## 19 1
## 20 1
## 21 1
## 22 1
## 23 1
## 24 1
## 25 1
## 26 1
## 27 1
## 28 1
## 29 1
## 30 1
## 31 1
## 32 1
## 33 1
## 34 1
## 35 1
## 36 1
## 37 1
## 38 1
## 39 1
## 40 1
## 41 1
## 42 1
## 43 1
## 44 1
## 45 1
## 46 1
## 47 1
## 48 1
## 49 1
## 50 1
## Warning: Number of logged events: 1
#get back the completed dataset
filtereddata <- complete(imp,1)
pander::pander(summary(filtereddata))
Table continues below
| Bath Soap : 0 |
Min. :1989-01-01 |
Min. :0.3300 |
Min. :0.0000 |
| Beer : 0 |
1st Qu.:1990-11-23 |
1st Qu.:0.4500 |
1st Qu.:1.0000 |
| Bottled Juice : 0 |
Median :1992-10-14 |
Median :0.4900 |
Median :1.0000 |
| FrontEnd Candies:396 |
Mean :1992-10-18 |
Mean :0.5376 |
Mean :0.9823 |
| Frozen Juice : 0 |
3rd Qu.:1994-09-12 |
3rd Qu.:0.6162 |
3rd Qu.:1.0000 |
| Soft Drink : 0 |
Max. :1996-08-18 |
Max. :2.0000 |
Max. :1.0000 |
| Min. : 93343 |
| 1st Qu.:148348 |
| Median :172761 |
| Mean :178825 |
| 3rd Qu.:200363 |
| Max. :473181 |
print(acf(filtereddata$SUM,lag.max = 156))

##
## Autocorrelations of series 'filtereddata$SUM', by lag
##
## 0 1 2 3 4 5 6 7 8 9 10
## 1.000 0.485 0.473 0.409 0.504 0.388 0.349 0.405 0.296 0.320 0.291
## 11 12 13 14 15 16 17 18 19 20 21
## 0.310 0.229 0.273 0.245 0.280 0.233 0.185 0.176 0.153 0.148 0.159
## 22 23 24 25 26 27 28 29 30 31 32
## 0.133 0.138 0.101 0.082 0.118 0.091 0.148 0.093 0.169 0.092 0.164
## 33 34 35 36 37 38 39 40 41 42 43
## 0.109 0.172 0.140 0.193 0.153 0.125 0.164 0.143 0.222 0.165 0.210
## 44 45 46 47 48 49 50 51 52 53 54
## 0.148 0.236 0.199 0.234 0.232 0.219 0.261 0.281 0.301 0.230 0.236
## 55 56 57 58 59 60 61 62 63 64 65
## 0.265 0.222 0.200 0.190 0.175 0.164 0.121 0.160 0.137 0.134 0.093
## 66 67 68 69 70 71 72 73 74 75 76
## 0.125 0.073 0.075 0.066 0.063 0.104 0.023 0.048 -0.007 0.099 0.011
## 77 78 79 80 81 82 83 84 85 86 87
## 0.018 0.051 0.063 0.015 0.004 0.064 -0.005 -0.006 0.001 0.063 0.003
## 88 89 90 91 92 93 94 95 96 97 98
## 0.011 0.030 0.046 0.054 0.055 0.063 0.073 0.052 0.091 0.090 0.108
## 99 100 101 102 103 104 105 106 107 108 109
## 0.060 0.133 0.112 0.133 0.139 0.169 0.170 0.136 0.147 0.099 0.068
## 110 111 112 113 114 115 116 117 118 119 120
## 0.088 0.095 0.047 0.040 0.036 0.052 -0.018 -0.020 -0.024 -0.015 -0.048
## 121 122 123 124 125 126 127 128 129 130 131
## -0.057 -0.102 -0.104 -0.103 -0.111 -0.075 -0.123 -0.089 -0.160 -0.082 -0.135
## 132 133 134 135 136 137 138 139 140 141 142
## -0.120 -0.060 -0.057 -0.077 -0.102 -0.052 -0.112 -0.082 -0.089 -0.045 -0.042
## 143 144 145 146 147 148 149 150 151 152 153
## -0.029 -0.059 -0.062 -0.051 -0.020 -0.026 -0.035 -0.043 -0.031 -0.017 -0.032
## 154 155 156
## -0.003 0.004 0.029
print(acf(filtereddata$PRICE,lag.max = 156))

##
## Autocorrelations of series 'filtereddata$PRICE', by lag
##
## 0 1 2 3 4 5 6 7 8 9 10
## 1.000 0.667 0.584 0.431 0.457 0.430 0.410 0.428 0.390 0.451 0.418
## 11 12 13 14 15 16 17 18 19 20 21
## 0.440 0.389 0.433 0.438 0.440 0.433 0.377 0.344 0.354 0.369 0.356
## 22 23 24 25 26 27 28 29 30 31 32
## 0.292 0.298 0.303 0.309 0.316 0.287 0.271 0.265 0.288 0.263 0.269
## 33 34 35 36 37 38 39 40 41 42 43
## 0.270 0.303 0.285 0.287 0.276 0.254 0.263 0.218 0.189 0.177 0.202
## 44 45 46 47 48 49 50 51 52 53 54
## 0.179 0.188 0.230 0.268 0.271 0.290 0.298 0.286 0.242 0.224 0.206
## 55 56 57 58 59 60 61 62 63 64 65
## 0.189 0.187 0.132 0.167 0.169 0.199 0.191 0.227 0.228 0.241 0.195
## 66 67 68 69 70 71 72 73 74 75 76
## 0.191 0.146 0.124 0.111 0.109 0.105 0.093 0.073 0.066 0.072 0.079
## 77 78 79 80 81 82 83 84 85 86 87
## 0.075 0.066 0.068 0.048 0.043 0.043 0.056 0.050 0.061 0.044 0.042
## 88 89 90 91 92 93 94 95 96 97 98
## 0.032 0.045 0.037 -0.021 -0.019 -0.038 -0.028 -0.009 0.000 -0.006 -0.006
## 99 100 101 102 103 104 105 106 107 108 109
## -0.014 -0.025 -0.032 -0.031 -0.034 -0.046 -0.059 -0.068 -0.087 -0.058 -0.054
## 110 111 112 113 114 115 116 117 118 119 120
## -0.042 -0.067 -0.066 -0.055 -0.041 -0.041 -0.062 -0.063 -0.063 -0.067 -0.087
## 121 122 123 124 125 126 127 128 129 130 131
## -0.068 -0.070 -0.068 -0.077 -0.065 -0.055 -0.061 -0.061 -0.065 -0.060 -0.063
## 132 133 134 135 136 137 138 139 140 141 142
## -0.066 -0.077 -0.081 -0.094 -0.079 -0.053 -0.030 -0.022 -0.037 -0.059 -0.055
## 143 144 145 146 147 148 149 150 151 152 153
## -0.067 -0.081 -0.083 -0.082 -0.058 -0.054 -0.071 -0.092 -0.089 -0.082 -0.065
## 154 155 156
## -0.069 -0.075 -0.083
Feature Engineering
recipe_spec <- recipe(SUM ~., filtereddata) %>%
step_timeseries_signature(ISOWEEKS) %>%
step_rm(matches("(iso$)|(xts$)|(day)|(hour)|(min)|(sec)|(am.pm)")) %>%
step_mutate(Date_week = factor(ISOWEEKS_week, ordered = TRUE)) %>%
step_lag(SUM, lag = 1, default=1) %>%
step_lag(PRICE, lag = 1, default=1) %>%
step_lag(PROMO, lag = 1, default=1) %>%
step_dummy(all_nominal(),keep_original_cols = T) %>%
step_normalize(contains("index.num"), ISOWEEKS_year) %>%
step_holiday_signature(ISOWEEKS,
holiday_pattern = "^US_",
locale_set = "US"
)
recipe_spec %>% prep() %>% juice()
## # A tibble: 396 x 111
## CATEGORY ISOWEEKS PRICE PROMO SUM ISOWEEKS_index.~ ISOWEEKS_year
## <fct> <date> <dbl> <int> <int> <dbl> <dbl>
## 1 FrontEnd Candies 1989-01-01 0.575 1 177538 -1.72 -1.50
## 2 FrontEnd Candies 1989-01-08 0.575 1 174278 -1.71 -1.50
## 3 FrontEnd Candies 1989-01-15 0.625 1 169752 -1.70 -1.50
## 4 FrontEnd Candies 1989-01-22 0.7 1 172676 -1.69 -1.50
## 5 FrontEnd Candies 1989-01-29 0.7 1 167725 -1.69 -1.50
## 6 FrontEnd Candies 1989-02-05 0.425 1 169454 -1.68 -1.50
## 7 FrontEnd Candies 1989-02-12 0.4 0 137547 -1.67 -1.50
## 8 FrontEnd Candies 1989-02-19 0.575 1 133348 -1.66 -1.50
## 9 FrontEnd Candies 1989-02-26 0.575 1 154926 -1.65 -1.50
## 10 FrontEnd Candies 1989-03-05 0.575 1 174732 -1.64 -1.50
## # ... with 386 more rows, and 104 more variables: ISOWEEKS_half <int>,
## # ISOWEEKS_quarter <int>, ISOWEEKS_month <int>, ISOWEEKS_month.lbl <ord>,
## # ISOWEEKS_mweek <int>, ISOWEEKS_week <int>, ISOWEEKS_week2 <int>,
## # ISOWEEKS_week3 <int>, ISOWEEKS_week4 <int>, Date_week <ord>,
## # lag_1_SUM <dbl>, lag_1_PRICE <dbl>, lag_1_PROMO <dbl>, CATEGORY_Beer <dbl>,
## # CATEGORY_Bottled.Juice <dbl>, CATEGORY_FrontEnd.Candies <dbl>,
## # CATEGORY_Frozen.Juice <dbl>, CATEGORY_Soft.Drink <dbl>, ...
Split and Settings
splits<- time_series_split(
filtereddata,
assess = "2 weeks",#input forecast horizon
cumulative = TRUE
)
## Using date_var: ISOWEEKS
splits %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(ISOWEEKS, SUM, .interactive = T,.facet_ncol= 2,)
#split settings
future = nrow(testing(splits))+1
Seed Settings
mxnet <- reticulate::import("mxnet")
mxnet$random$seed <- 123
Model 1: ARIMA
# Model 1: auto_arima ----
model_fit_arima_no_boost <- arima_reg() %>%
set_engine(engine = "auto_arima") %>%
fit(SUM ~ ., data = training(splits))
## frequency = 13 observations per 1 quarter
Model 2: Exponential Smoothing
# Model 2 ets ----
model_fit_ets <- exp_smoothing(
#seasonal_period = 52,
error = "multiplicative",
trend = "additive",
season = "multiplicative"
) %>%
set_engine(engine = "ets") #%>%
#fit(SUM ~ ., data = training(splits))
Model 3: Prophet
#prophet
model_spec_prophet = prophet_reg(
seasonality_weekly=TRUE,
seasonality_yearly = F,
season = "multiplicative"
)
Model 4: Random Forest
#random forest
model_spec_rf <- rand_forest(mode = "regression",trees = 500, min_n = 50) %>%
set_engine("randomForest")
Model 5: DeepAR - LSTM
#deepar_lstm
model_spec_deepar_lstm = deep_ar(
id = "CATEGORY",
freq = "W",
prediction_length = future,
lookback_length = 3*HORIZON,
num_layers = 1,
epochs = 5,
scale = F
) %>%
set_engine("gluonts_deepar")
Model 5C: DeepAR - GRU
#deepar_gru
model_spec_deepar_gru = deep_ar(
id = "CATEGORY",
freq = "W",
prediction_length = future,
lookback_length = 3*HORIZON,
num_layers = 1,
epochs = 5,
cell_type = "gru",
scale = F
) %>%
set_engine("gluonts_deepar")
Fitted Workflow
#ARIMA
#wflw_fit_arima_no_boost <- workflow() %>%
#add_model(model_fit_arima_no_boost) %>%
#add_recipe(recipe_spec %>% step_rm(all_nominal()) ) %>%
#fit(training(splits))
#ETS
wflw_fit_ets <- workflow() %>%
add_model(model_fit_ets) %>%
add_recipe(recipe_spec) %>%
fit(training(splits))
## frequency = 13 observations per 1 quarter
#Prophet
wflw_fit_prophet <- workflow() %>%
add_model(model_spec_prophet) %>%
add_recipe(recipe_spec) %>%
fit(training(splits))
## Warning: Engine set to `prophet`.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
#random forest
workflow_fit_rf <- workflow() %>%
add_model(model_spec_rf) %>%
add_recipe(recipe_spec %>% step_rm("ISOWEEKS")) %>%
fit(training(splits))
#deepar_lstm
wflw_fit_deepar_lstm <- workflow() %>%
add_model(model_spec_deepar_lstm) %>%
add_recipe(recipe_spec) %>%
fit(training(splits))
#deepar_gru
wflw_fit_deepar_gru <- workflow() %>%
add_model(model_spec_deepar_gru) %>%
add_recipe(recipe_spec) %>%
fit(training(splits))
Sub Model Evaluation
submodels_tbl <- modeltime_table(
#wflw_fit_arima_no_boost,
model_fit_arima_no_boost,
wflw_fit_ets,
wflw_fit_prophet,
wflw_fit_deepar_lstm,
wflw_fit_deepar_gru,
workflow_fit_rf
)
submodels_tbl %>%
modeltime_accuracy(testing(splits)) %>%
table_modeltime_accuracy(.interactive = FALSE)
| .model_id |
.model_desc |
.type |
mae |
mape |
mase |
smape |
rmse |
rsq |
| 1 |
REGRESSION WITH ARIMA(0,1,1) ERRORS |
Test |
3407.03 |
2.48 |
0.51 |
2.50 |
3563.17 |
1 |
| 2 |
ETS(M,AD,M) |
Test |
4951.54 |
3.62 |
0.75 |
3.65 |
5104.60 |
1 |
| 3 |
PROPHET W/ REGRESSORS |
Test |
13194.58 |
9.54 |
1.99 |
10.18 |
15776.37 |
1 |
| 4 |
DEEPAR |
Test |
3108.36 |
2.32 |
0.47 |
2.29 |
3620.94 |
1 |
| 5 |
DEEPAR |
Test |
3998.95 |
2.91 |
0.60 |
2.96 |
4354.12 |
1 |
| 6 |
RANDOMFOREST |
Test |
11475.13 |
8.49 |
1.73 |
8.12 |
11897.19 |
1 |
submodels_tbl %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = filtereddata,
keep_data = T
) %>% group_by(CATEGORY) %>%
plot_modeltime_forecast(
.interactive = T,
.facet_ncol = 2,
.legend_max_width = 8)
## Warning: Expecting the following names to be in the data frame: .conf_hi, .conf_lo.
## Proceeding with '.conf_interval_show = FALSE' to visualize the forecast without confidence intervals.
## Alternatively, try using `modeltime_calibrate()` before forecasting to add confidence intervals.